# APPENDIX B, CODE

library(plyr)
library(ggplot2)
library(zoo)
library(scales)
library(stringr)
library(dplyr)
library(glue)
library(tidyverse)
library(text2vec)
library(lubridate)
library(data.table)
library(quanteda)
library(quanteda.textstats)
library(rddtools)
library(magrittr)
library(stargazer)
library(lmtest)
library(fixest)
library(modelsummary)
library(readxl)
library(cowplot)
library(texreg)
library(rdd)

load("path to data_final.rda")
df_final <- subset(df_final, party != 1) # removing speaker
df_final <- subset(df_final, party != 0) # removing process speeches

# B1. Full models

# Aggregate

# Neoliberalism

df_final$word <- str_count(df_final$tokens_edit, "markedsøkonomi|frie marked|markedsreform") 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, z=ifelse(df_final_work$date_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
summary(rdd_test)
plot(rdd_test)
bw_ik <- rdd_bw_ik(rdd_test)
reg_nonpara <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)
print(reg_nonpara)

# Aggregate

# Discrediting socialism

df_final$word <- str_count(df_final$tokens_edit, "plantyranni|planøkono|kommandoøkono") 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, z=ifelse(df_final_work$date_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
summary(rdd_test)
plot(rdd_test)
bw_ik <- rdd_bw_ik(rdd_test)
reg_nonpara <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)
print(reg_nonpara)

# Parties

# Neoliberalism

party_sub = df_final
party_sub$word <- str_count(party_sub$tokens_edit, "markedsøkonomi|frie marked|markedsreform") 
parties <- c(2:7, 10, 12)
results_list_neolib <- list()  # Initialize an empty list to store results

for (party in parties) {
  
  # Subset data for the current party
  party_sub <- df_final[df_final$party == party, ]
  
  # Count occurrences of specified words
  party_sub$word <- str_count(party_sub$tokens_edit, "markedsøkonomi|frie marked|markedsreform")
  
  # Assign to df_final_work (if you need to save it separately)
  df_final_work <- party_sub
  
  # Convert cutpoint date
  cutpoint_date <- as.Date("1989-11-09")
  cutpoint_numeric <- as.numeric(cutpoint_date)
  
  # Convert date_numeric
  df_final_work$date_numeric <- as.numeric(df_final_work$date)
  
  # RDD analysis
  rdd_test <- rdd_data(y = df_final_work$word, x = df_final_work$date_numeric, z = ifelse(df_final_work$date_numeric > cutpoint_numeric, 1, 0), cutpoint = cutpoint_numeric)

  # Bandwidth selection
  bw_ik <- rdd_bw_ik(rdd_test)
  
  # Non-parametric regression
  reg_nonpara <- rdd_reg_np(rdd_object = rdd_test, bw = bw_ik)
  
  # Store results in the list, indexed by party
  results_list_neolib[[as.character(party)]] <- reg_nonpara
  
  # Print or do further processing if needed
  print(reg_nonpara)
}

# Discrediting socialism

party_sub = df_final
parties <- c(2:7, 10, 12)
party_sub$word <- str_count(party_sub$tokens_edit, "plantyranni|planøkono|kommandoøkono") 
results_list_soc <- list()  # Initialize an empty list to store results

for (party in parties) {
  tryCatch({
    # Subset data for the current party
    party_sub <- df_final[df_final$party == party, ]
    
    # Count occurrences of specified words
    party_sub$word <- str_count(party_sub$tokens_edit, "plantyranni|planøkono|kommandoøkono")
    
    # Assign to df_final_work (if you need to save it separately)
    df_final_work <- party_sub
    
    # Convert cutpoint date
    cutpoint_date <- as.Date("1989-11-09")
    cutpoint_numeric <- as.numeric(cutpoint_date)
    
    # Convert date_numeric
    df_final_work$date_numeric <- as.numeric(df_final_work$date)
    
    # RDD analysis
    rdd_test <- rdd_data(y = df_final_work$word, x = df_final_work$date_numeric, z = ifelse(df_final_work$date_numeric > cutpoint_numeric, 1, 0), cutpoint = cutpoint_numeric)

    # Bandwidth selection
    bw_ik <- rdd_bw_ik(rdd_test)
    
    # Non-parametric regression
    reg_nonpara <- rdd_reg_np(rdd_object = rdd_test, bw = bw_ik)
    
    # Store results in the list, indexed by party
    results_list_soc[[as.character(party)]] <- reg_nonpara
    
    # Print or do further processing if needed
    print(reg_nonpara)
  }, error = function(e) {
    # Print error message and continue
    message(paste("Error processing party", party, ":", e$message))
  })
}

# B2. Replication for discrediting socialism using monthly aggregates

# Social Democrats

df_final$yearmon <- format(df_final$date, "%Y-%m")
party_sub = df_final[df_final$party==2, ]
party_sub$word <- str_count(party_sub$tokens_edit, "plantyranni|planøkono|kommandoøkono") 
bymonthyear_word <- aggregate(cbind(word)~yearmon, data=party_sub,FUN=sum)

bymonthyear_word$yearmon <- paste(bymonthyear_word$yearmon, "01", sep="-")
bymonthyear_word$yearmon <- as.Date(bymonthyear_word$yearmon)

bymonthyear_word <- bymonthyear_word %>%
  filter(format(yearmon, '%Y') %between% c('1974', '2009'))
cutpoint_date <- as.Date("1989-11-01")
cutpoint_numeric <- as.numeric(cutpoint_date)
bymonthyear_word$yearmon_numeric <- as.numeric(bymonthyear_word$yearmon)

rdd_test <- rdd_data(y=bymonthyear_word$word, x=bymonthyear_word$yearmon_numeric, cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
reg_nonpara <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)
print(reg_nonpara)

# Centre Democrats

df_final$yearmon <- format(df_final$date, "%Y-%m")
party_sub = df_final[df_final$party==10, ]
party_sub$word <- str_count(party_sub$tokens_edit, "plantyranni|planøkono|kommandoøkono") 
bymonthyear_word <- aggregate(cbind(word)~yearmon, data=party_sub,FUN=sum)

bymonthyear_word$yearmon <- paste(bymonthyear_word$yearmon, "01", sep="-")
bymonthyear_word$yearmon <- as.Date(bymonthyear_word$yearmon)

bymonthyear_word <- bymonthyear_word %>%
  filter(format(yearmon, '%Y') %between% c('1974', '2009'))
cutpoint_date <- as.Date("1989-11-01")
cutpoint_numeric <- as.numeric(cutpoint_date)
bymonthyear_word$yearmon_numeric <- as.numeric(bymonthyear_word$yearmon)

rdd_test <- rdd_data(y=bymonthyear_word$word, x=bymonthyear_word$yearmon_numeric, cutpoint=cutpoint_numeric)
reg_nonpara <- rdd_reg_np(rdd_object=rdd_test, bw=5287.674) # TOO FEW OBSERVATIONS TO COMPUTE BW, USE BW FROM MAIN MODEL
print(reg_nonpara)

# Christian Democrats

df_final$yearmon <- format(df_final$date, "%Y-%m")
party_sub = df_final[df_final$party==12, ]
party_sub$word <- str_count(party_sub$tokens_edit, "plantyranni|planøkono|kommandoøkono") 
bymonthyear_word <- aggregate(cbind(word)~yearmon, data=party_sub,FUN=sum)

bymonthyear_word$yearmon <- paste(bymonthyear_word$yearmon, "01", sep="-")
bymonthyear_word$yearmon <- as.Date(bymonthyear_word$yearmon)

bymonthyear_word <- bymonthyear_word %>%
  filter(format(yearmon, '%Y') %between% c('1974', '2009'))
cutpoint_date <- as.Date("1989-11-01")
cutpoint_numeric <- as.numeric(cutpoint_date)
bymonthyear_word$yearmon_numeric <- as.numeric(bymonthyear_word$yearmon)

rdd_test <- rdd_data(y=bymonthyear_word$word, x=bymonthyear_word$yearmon_numeric, cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
reg_nonpara <- rdd_reg_np(rdd_object=rdd_test, bw=5287.674) # TOO FEW OBSERVATIONS TO COMPUTE BW, USE BW FROM MAIN MODEL
print(reg_nonpara) 

# B3.

# Placebo, half median, neoliberalism
df_final$word <- str_count(df_final$tokens_edit, "markedsøkonomi|frie marked|markedsreform") 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
df_final_work$treatment <- ifelse(df_final_work$date_numeric > cutpoint_numeric, 1, 0)
median_pre <- median(df_final_work$date_numeric[df_final_work$treatment == 0])

rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, cutpoint=median_pre)
bw_ik <- rdd_bw_ik(rdd_test)
reg_placebo_neo <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)

# Placebo, half median, socialism-discrediting
df_final$word <- str_count(df_final$tokens_edit, "plantyranni|planøkono|kommandoøkono") 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
df_final_work$treatment <- ifelse(df_final_work$date_numeric > cutpoint_numeric, 1, 0)
median_pre <- median(df_final_work$date_numeric[df_final_work$treatment == 0])

rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, cutpoint=median_pre)
reg_placebo_soc <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik) # have to use manual bw because of lack of observations. use same as for neoliberalism

# To create table, results have to be manually inserted

# Placebo plot, single dates, neoliberalism

df_final$word <- str_count(df_final$tokens_edit, "markedsøkonomi|frie marked|markedsreform") 

df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)

# The sharp model necessary to make placebo

rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, cutpoint=cutpoint_numeric) # sharp rdd necessary to make placebo
bw_ik <- rdd_bw_ik(rdd_test)
reg_nonpara <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)
placebo_data <- plotPlacebo(reg_nonpara, by=250)

# Plot

placeboplot_neo <- ggplot(placebo_data, aes(x = cutpoint, y = LATE)) +
  geom_line(data = subset(placebo_data, position == 'left'), aes(x = cutpoint, y = CI_low), color = "grey", linetype = "solid") +
  geom_line(data = subset(placebo_data, position == 'left'), aes(x = cutpoint, y = CI_high), color = "grey", linetype = "solid") +
  geom_line(data = subset(placebo_data, position == 'right'), aes(x = cutpoint, y = CI_low), color = "grey", linetype = "solid") +
  geom_line(data = subset(placebo_data, position == 'right'), aes(x = cutpoint, y = CI_high), color = "grey", linetype = "solid") +
  
  geom_line(data = subset(placebo_data, position == 'left'), aes(x = cutpoint, y = LATE), color = "red") +
  geom_line(data = subset(placebo_data, position == 'right'), aes(x = cutpoint, y = LATE), color = "red") +
  
  geom_point(aes(color = position), size = 1) +
  
  geom_text(data = subset(placebo_data, position == "True"),
            aes(label = "TRUE CUTPOINT"), vjust = -0.75, hjust = -0.25) +
  
  geom_errorbar(data = subset(placebo_data, position == "True"),
                aes(ymin = CI_low, ymax = CI_high),
                width = 200, color = "black") +  # Specify the width and color
  
  labs(x = "Cutpoint (days after Jan 1 1970)", y = "LATE") +
  theme_minimal() +
  
  scale_color_manual(values = c("left" = "black", "right" = "black", "True" = "black")) +
  
  theme(legend.position = "none") +
  
  geom_hline(yintercept = 0, color = "black", linetype = "solid")

# Placebos, neoliberalism, monthly aggregates

df_final$yearmon <- format(df_final$date, "%Y-%m")
df_final$word <- str_count(df_final$tokens_edit, "markedsøkonomi|frie marked|markedsreform") 
bymonthyear_word <- aggregate(cbind(word)~yearmon, data=df_final,FUN=sum)

bymonthyear_word$yearmon <- paste(bymonthyear_word$yearmon, "01", sep="-")
bymonthyear_word$yearmon <- as.Date(bymonthyear_word$yearmon)

bymonthyear_word <- bymonthyear_word %>%
  filter(format(yearmon, '%Y') %between% c('1974', '2009'))
cutpoint_date <- as.Date("1989-11-01")
cutpoint_numeric <- as.numeric(cutpoint_date)
bymonthyear_word$yearmon_numeric <- as.numeric(bymonthyear_word$yearmon)

rdd_test <- rdd_data(y=bymonthyear_word$word, x=bymonthyear_word$yearmon_numeric, cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
reg_nonpara <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)
placebo_data <- plotPlacebo(reg_nonpara, by = 1)

# Plot

placeboplot_neo_month <- ggplot(placebo_data, aes(x = cutpoint, y = LATE)) +
  geom_line(data = subset(placebo_data, position == 'left'), aes(x = cutpoint, y = CI_low), color = "grey", linetype = "solid") +
  geom_line(data = subset(placebo_data, position == 'left'), aes(x = cutpoint, y = CI_high), color = "grey", linetype = "solid") +
  
  geom_line(data = subset(placebo_data, position == 'right'), aes(x = cutpoint, y = CI_low), color = "grey", linetype = "solid") +
  geom_line(data = subset(placebo_data, position == 'right'), aes(x = cutpoint, y = CI_high), color = "grey", linetype = "solid") +
  
  geom_line(data = subset(placebo_data, position == 'left'), aes(x = cutpoint, y = LATE), color = "red") +
  geom_line(data = subset(placebo_data, position == 'right'), aes(x = cutpoint, y = LATE), color = "red") +
  
  geom_point(data = subset(placebo_data, position == "True"), aes(color = position), size = 1) +
  
  geom_text(data = subset(placebo_data, position == "True"),
            aes(label = "TRUE CUTPOINT"), vjust = -3, hjust = -0.1) +
  
  geom_errorbar(data = subset(placebo_data, position == "True"),
                aes(ymin = CI_low, ymax = CI_high),
                width = 200, color = "black") +  # Specify the width and color
  
  labs(title = "Placebo, neoliberalism", x = "Cutpoint (days after Jan 1 1970)", y = "LATE") +
  theme_minimal() +
  
  theme(plot.title = element_text(hjust = 0.5)) +  # Center the title
  
  scale_color_manual(values = c("left" = "black", "right" = "black", "True" = "black")) +
  
  theme(legend.position = "none") +
  
  geom_hline(yintercept = 0, color = "black", linetype = "solid")

# Placebos, socialism-discrediting, monthly aggregates

df_final$yearmon <- format(df_final$date, "%Y-%m")
df_final$word <- str_count(df_final$tokens_edit, "plantyranni|planøkono|kommandoøkono") 
bymonthyear_word <- aggregate(cbind(word)~yearmon, data=df_final,FUN=sum)

bymonthyear_word$yearmon <- paste(bymonthyear_word$yearmon, "01", sep="-")
bymonthyear_word$yearmon <- as.Date(bymonthyear_word$yearmon)

bymonthyear_word <- bymonthyear_word %>%
  filter(format(yearmon, '%Y') %between% c('1974', '2009'))
cutpoint_date <- as.Date("1989-11-01")
cutpoint_numeric <- as.numeric(cutpoint_date)
bymonthyear_word$yearmon_numeric <- as.numeric(bymonthyear_word$yearmon)

rdd_test <- rdd_data(y=bymonthyear_word$word, x=bymonthyear_word$yearmon_numeric, cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
reg_nonpara <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)
placebo_data <- plotPlacebo(reg_nonpara, by = 1)

# Plot

placeboplot_soc_month <- ggplot(placebo_data, aes(x = cutpoint, y = LATE)) +
  geom_line(data = subset(placebo_data, position == 'left'), aes(x = cutpoint, y = CI_low), color = "grey", linetype = "solid") +
  geom_line(data = subset(placebo_data, position == 'left'), aes(x = cutpoint, y = CI_high), color = "grey", linetype = "solid") +
  
  geom_line(data = subset(placebo_data, position == 'right'), aes(x = cutpoint, y = CI_low), color = "grey", linetype = "solid") +
  geom_line(data = subset(placebo_data, position == 'right'), aes(x = cutpoint, y = CI_high), color = "grey", linetype = "solid") +
  
  geom_line(data = subset(placebo_data, position == 'left'), aes(x = cutpoint, y = LATE), color = "red") +
  geom_line(data = subset(placebo_data, position == 'right'), aes(x = cutpoint, y = LATE), color = "red") +
  
  geom_point(data = subset(placebo_data, position == "True"), aes(color = position), size = 1) +
  
  geom_text(data = subset(placebo_data, position == "True"),
            aes(label = "TRUE CUTPOINT"), vjust = -3, hjust = -0.1) +
  
  geom_errorbar(data = subset(placebo_data, position == "True"),
                aes(ymin = CI_low, ymax = CI_high),
                width = 200, color = "black") +  # Specify the width and color
  
  labs(title = "Placebo, socialism-discrediting", x = "Cutpoint (days after Jan 1 1970)", y = "LATE") +
  theme_minimal() +
  
  theme(plot.title = element_text(hjust = 0.5)) +  # Center the title
  
  scale_color_manual(values = c("left" = "black", "right" = "black", "True" = "black")) +
  
  theme(legend.position = "none") +
  
  geom_hline(yintercept = 0, color = "black", linetype = "solid")

# Combined plot

plot_grid_placebos_month <- plot_grid(placeboplot_neo_month, placeboplot_soc_month,
                            nrow = 1, ncol = 2, align = "h", axis = "b")
# B4. Bandwidth sensitivity

# Neoliberalism

df_final$word <- str_count(df_final$tokens_edit, "markedsøkonomi|frie marked|markedsreform") 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, z=ifelse(df_final_work$date_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
reg_nonpara <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)
plot_sensi_neo <- plotSensi(reg_nonpara, from=0, to=12700, by=100)
plot_sensi_neo <-ggplot(plot_sensi_neo, aes(x = bw, y = LATE)) +
  geom_line(na.rm = TRUE, color = "blue", size = 1) +  # Line for LATE
  geom_ribbon(aes(ymin = CI_low, ymax = CI_high), fill = "grey80", alpha = 0.5) +  # Shaded CI area
  labs(x = "bw", y = "LATE", title = "Neoliberalism") +
  theme_minimal() +  # Clean theme
  theme(plot.title = element_text(hjust = 0.5))  # Center title

# Socialism-discrediting

df_final$word <- str_count(df_final$tokens_edit, "plantyranni|planøkono|kommandoøkono") 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, z=ifelse(df_final_work$date_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
reg_nonpara <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)
plot_sensi_soc <- plotSensi(reg_nonpara, from=0, to=12700, by=100)
plot_sensi_soc <- ggplot(plot_sensi_soc, aes(x = bw, y = LATE)) +
  geom_line(na.rm = TRUE, color = "blue", size = 1) +  # Line for LATE
  geom_ribbon(aes(ymin = CI_low, ymax = CI_high), fill = "grey80", alpha = 0.5) +  # Shaded CI area
  labs(x = "bw", y = "LATE", title = "Socialism-discrediting") +
  theme_minimal() +  # Clean theme
  theme(plot.title = element_text(hjust = 0.5))  # Center title

# Combined plot

plot_grid_sensi <- plot_grid(plot_sensi_neo, plot_sensi_soc,
                             nrow = 1, ncol = 2, align = "h", axis = "b")

# B5. Test for sorting

# Neoliberalism

df_final$yearmon <- format(df_final$date, "%Y-%m")
df_final$word <- str_count(df_final$tokens_edit, "markedsøkonomi|frie marked|markedsreform") 
bymonthyear_word <- aggregate(cbind(word)~yearmon, data=df_final,FUN=sum)

bymonthyear_word$yearmon <- paste(bymonthyear_word$yearmon, "01", sep="-")
bymonthyear_word$yearmon <- as.Date(bymonthyear_word$yearmon)

bymonthyear_word <- bymonthyear_word %>%
  filter(format(yearmon, '%Y') %between% c('1974', '2009'))
cutpoint_date <- as.Date("1989-11-01")
cutpoint_numeric <- as.numeric(cutpoint_date)
bymonthyear_word$yearmon_numeric <- as.numeric(bymonthyear_word$yearmon)

DCdensity(bymonthyear_word$yearmon_numeric,cutpoint_numeric)

# Socialism-discrediting

df_final$yearmon <- format(df_final$date, "%Y-%m")
df_final$word <- str_count(df_final$tokens_edit, "plantyranni|planøkono|kommandoøkono") 
bymonthyear_word <- aggregate(cbind(word)~yearmon, data=df_final,FUN=sum)

bymonthyear_word$yearmon <- paste(bymonthyear_word$yearmon, "01", sep="-")
bymonthyear_word$yearmon <- as.Date(bymonthyear_word$yearmon)

bymonthyear_word <- bymonthyear_word %>%
  filter(format(yearmon, '%Y') %between% c('1974', '2009'))
cutpoint_date <- as.Date("1989-11-01")
cutpoint_numeric <- as.numeric(cutpoint_date)
bymonthyear_word$yearmon_numeric <- as.numeric(bymonthyear_word$yearmon)

DCdensity(bymonthyear_word$yearmon_numeric,cutpoint_numeric)

# B6. Test with non-linear specifications

# Neoliberalism

df_final$word <- str_count(df_final$tokens_edit, "markedsøkonomi|frie marked|markedsreform") 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
reg_para_ik <- rdd_reg_lm(rdd_object=rdd_test, order=2, bw=bw_ik)

# Socialism-discrediting

df_final$word <- str_count(df_final$tokens_edit, "plantyranni|planøkono|kommandoøkono") 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
reg_para_ik <- rdd_reg_lm(rdd_object=rdd_test, order=2, bw=bw_ik)

# B7. Testing with randomly generated dictionaries

# ALTERNATIVE DICTIONARY, RANDOMLY GENERATED #1
set.seed(123)
all_text <- paste(df_final$tokens_edit, collapse = " ")
all_words <- str_split(all_text, " ")[[1]]
if (length(all_words) < 3) {
  stop("The combined strings do not contain enough words to sample.")
}
sampled_words <- sample(all_words, size = 3, replace = FALSE)
word_1 <- sampled_words[1]
word_2 <- sampled_words[2]
word_3 <- sampled_words[3]
print(word_1)
print(word_2)
print(word_3)

sampled_words_pattern <- paste0(paste(sampled_words, collapse = "|"))
df_final$word <- str_count(df_final$tokens_edit, sampled_words_pattern) 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, z=ifelse(df_final_work$date_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
random1 <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)
print(random1)

# ALTERNATIVE DICTIONARY, RANDOMLY GENERATED #2

set.seed(456)
all_text <- paste(df_final$tokens_edit, collapse = " ")
all_words <- str_split(all_text, " ")[[1]]
if (length(all_words) < 3) {
  stop("The combined strings do not contain enough words to sample.")
}
sampled_words <- sample(all_words, size = 3, replace = FALSE)
word_1 <- sampled_words[1]
word_2 <- sampled_words[2]
word_3 <- sampled_words[3]
print(word_1)
print(word_2)
print(word_3)

sampled_words_pattern <- paste0(paste(sampled_words, collapse = "|"))
df_final$word <- str_count(df_final$tokens_edit, sampled_words_pattern) 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, z=ifelse(df_final_work$date_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
random2 <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)
print(random2)

# ALTERNATIVE DICTIONARY, RANDOMLY GENERATED #3

set.seed(789) 
all_text <- paste(df_final$tokens_edit, collapse = " ")
all_words <- str_split(all_text, " ")[[1]]
if (length(all_words) < 3) {
  stop("The combined strings do not contain enough words to sample.")
}
sampled_words <- sample(all_words, size = 3, replace = FALSE)
word_1 <- sampled_words[1]
word_2 <- sampled_words[2]
word_3 <- sampled_words[3]
print(word_1)
print(word_2)
print(word_3)

sampled_words_pattern <- paste0(paste(sampled_words, collapse = "|"))
df_final$word <- str_count(df_final$tokens_edit, sampled_words_pattern) 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, z=ifelse(df_final_work$date_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
random3 <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)
print(random3)

# B8. Tests with alternative dictionaries

# ALTERNATIVE DICTIONARIES

# NEOLIBERALISM
df_final$word <- str_count(df_final$tokens_edit, "konkurrenc|privatiser|liberalis|dereguler") 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, z=ifelse(df_final_work$date_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
summary(rdd_test)
plot(rdd_test)
bw_ik <- rdd_bw_ik(rdd_test)
reg_nonpara <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)
print(reg_nonpara)

# SOCIALISM
df_final$word <- str_count(df_final$tokens_edit, "munkemarx|statstyranni|betonkommuni|statsdirig|statsstyr") 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, z=ifelse(df_final_work$date_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
summary(rdd_test)
plot(rdd_test)
bw_ik <- rdd_bw_ik(rdd_test)
reg_nonpara <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)
print(reg_nonpara)

# MANIFESTO PROJECT DATASET
df_final$word <- str_count(df_final$tokens_edit, "fri markedsøkonomi|frie marked|laissez-faire|
                                  individuelt entreprenørskab|individuel entreprenør|
                                  iværksætter|privat ejendomsret|privat ejer|personligt initiativ") 

df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, z=ifelse(df_final_work$date_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
summary(rdd_test)
plot(rdd_test)
bw_ik <- rdd_bw_ik(rdd_test)
reg_nonpara <- rdd_reg_np(rdd_object=rdd_test, bw=bw_ik)
reg_nonpara

# B9. Test with covariates

# Neoliberalism

df_final$word <- str_count(df_final$tokens_edit, "markedsøkonomi|frie marked|markedsreform") 
df_final$ussr <- as.numeric(format(df_final$date, '%Y-%m-%d') %between% c('1991-12-26', '2020-01-01'))

df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, z=ifelse(df_final_work$date_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
df_final_work$treatment <- ifelse(df_final_work$date_numeric > cutpoint_numeric, 1, 0)
rd_est <- RDestimate(word~date_numeric+treatment|quarterly_growth+unemp_month+election+ussr,
                     cutpoint=cutpoint_numeric, 
                     bw=bw_ik, data=df_final_work)
summary(rd_est)

# Showing covariates manually as cov.est = TRUE does not work

rd_est <- RDestimate(quarterly_growth~date_numeric+treatment|word+unemp_month+election+ussr,
                     cutpoint=cutpoint_numeric, 
                     bw=bw_ik, data=df_final_work)
summary(rd_est)

rd_est <- RDestimate(unemp_month~date_numeric+treatment|word+quarterly_growth+election+ussr,
                     cutpoint=cutpoint_numeric, 
                     bw=bw_ik, data=df_final_work)
summary(rd_est)

rd_est <- RDestimate(election~date_numeric+treatment|word+quarterly_growth+unemp_month+ussr,
                     cutpoint=cutpoint_numeric, 
                     bw=bw_ik, data=df_final_work)
summary(rd_est)

rd_est <- RDestimate(ussr~date_numeric+treatment|word+quarterly_growth+unemp_month+election,
                     cutpoint=cutpoint_numeric, 
                     bw=bw_ik, data=df_final_work)
summary(rd_est)

# Socialism-discrediting

df_final$word <- str_count(df_final$tokens_edit, "plantyranni|planøkono|kommandoøkono") 
df_final$ussr <- as.numeric(format(df_final$date, '%Y-%m-%d') %between% c('1991-12-26', '2020-01-01'))

df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, z=ifelse(df_final_work$date_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
df_final_work$treatment <- ifelse(df_final_work$date_numeric > cutpoint_numeric, 1, 0)
rd_est <- RDestimate(word~date_numeric+treatment|quarterly_growth+unemp_month+election+ussr,
                     cutpoint=cutpoint_numeric, 
                     bw=bw_ik, data=df_final_work)
summary(rd_est)

rd_est <- RDestimate(quarterly_growth~date_numeric+treatment|word+unemp_month+election+ussr,
                     cutpoint=cutpoint_numeric, 
                     bw=bw_ik, data=df_final_work)
summary(rd_est)

rd_est <- RDestimate(unemp_month~date_numeric+treatment|word+quarterly_growth+election+ussr,
                     cutpoint=cutpoint_numeric, 
                     bw=bw_ik, data=df_final_work)
summary(rd_est)

rd_est <- RDestimate(election~date_numeric+treatment|word+quarterly_growth+unemp_month+ussr,
                     cutpoint=cutpoint_numeric, 
                     bw=bw_ik, data=df_final_work)
summary(rd_est)

rd_est <- RDestimate(ussr~date_numeric+treatment|word+quarterly_growth+unemp_month+election,
                     cutpoint=cutpoint_numeric, 
                     bw=bw_ik, data=df_final_work)
summary(rd_est)

# B10. No covariate jump at cutoff

# Load GDP quarterly data and unemployment monthly data
unemp_month <- read.csv(file = "unemp_month.csv", sep = ";", header = TRUE)
gdp_quart <- read.csv(file = "quarterly_growth.csv", sep = ";", header = TRUE)

# GDP, quarterly
gdp_quart$quarter_numeric <- as.numeric(gsub("-Q", "", gdp_quart$quarter))
gdp_quart$quarterly_growth <- as.numeric(gsub(",", ".", gdp_quart$quarterly_growth)) # replacing , with .
cutpoint_numeric <- 19894
rdd_test <- rdd_data(y=gdp_quart$quarterly_growth, x=gdp_quart$quarter_numeric, z=ifelse(gdp_quart$quarter_numeric>cutpoint_numeric,1,0), cutpoint=19892)
reg_nonpara_gdp <- rdd_reg_np(rdd_object=rdd_test, bw=50)

# Unemployment, monthly
months_map <- c("Jan" = "01", "Feb" = "02", "Mar" = "03", "Apr" = "04", "May" = "05", "Jun" = "06",
                "Jul" = "07", "Aug" = "08", "Sep" = "09", "Oct" = "10", "Nov" = "11", "Dec" = "12") # making yearmon in right structure
unemp_month$yearmon <- str_replace_all(unemp_month$yearmon, months_map)
unemp_month$unemp_month <- as.numeric(gsub(",", ".", unemp_month$unemp_month)) # replacing , with .
unemp_month$yearmon_numeric <- as.numeric(gsub("-", "", unemp_month$yearmon))
cutpoint_numeric <- 198911
rdd_test <- rdd_data(y=unemp_month$unemp_month, x=unemp_month$yearmon_numeric, z=ifelse(unemp_month$yearmon_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
reg_nonpara_unemp <- rdd_reg_np(rdd_object=rdd_test, bw=400)

# B11. Using Poisson regression

# Aggregate

# Neoliberalism

df_final$word <- str_count(df_final$tokens_edit, "markedsøkonomi|frie marked|markedsreform") 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, z=ifelse(df_final_work$date_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
df_bandwidth <- df_final_work %>%
  filter(date_numeric >= (cutpoint_numeric - bw_ik) & date_numeric <= (cutpoint_numeric + bw_ik))

df_bandwidth <- df_bandwidth %>%
  mutate(threshold = ifelse(date_numeric > cutpoint_numeric, 1, 0))

lm_diff_slope <- feglm(word ~ threshold + I(date_numeric - cutpoint_numeric) + 
                         threshold:I(date_numeric - cutpoint_numeric),
                       data = df_bandwidth,
                       family = quasipoisson())
summary(lm_diff_slope)

# Aggregate

# Discrediting socialism

df_final$word <- str_count(df_final$tokens_edit, "plantyranni|planøkono|kommandoøkono") 
df_final_work <- df_final
cutpoint_date <- as.Date("1989-11-09")
cutpoint_numeric <- as.numeric(cutpoint_date)
df_final_work$date_numeric <- as.numeric(df_final_work$date)
rdd_test <- rdd_data(y=df_final_work$word, x=df_final_work$date_numeric, z=ifelse(df_final_work$date_numeric>cutpoint_numeric,1,0), cutpoint=cutpoint_numeric)
bw_ik <- rdd_bw_ik(rdd_test)
df_bandwidth <- df_final_work %>%
  filter(date_numeric >= (cutpoint_numeric - bw_ik) & date_numeric <= (cutpoint_numeric + bw_ik))

df_bandwidth <- df_bandwidth %>%
  mutate(threshold = ifelse(date_numeric > cutpoint_numeric, 1, 0))

lm_diff_slope <- feglm(word ~ threshold + I(date_numeric - cutpoint_numeric) + 
                         threshold:I(date_numeric - cutpoint_numeric),
                       data = df_bandwidth,
                       family = quasipoisson())
summary(lm_diff_slope)